package com.chenjun.parse;

import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.chenjun.request.HttpClientMethod;

public class HtmlParse
{
	private Document document;

	/**
	 * 获取html文档中所有的超链接
	 * 
	 * @param responseBody
	 * @return
	 */
	public List<String> getHyperLinks(String responseBody)
	{
		List<String> list = new ArrayList<String>();
		document = Jsoup.parse(responseBody);
		Elements links = document.getElementsByTag("a");
		for (Element link : links)
		{
			String linkHref = link.attr("href");
			// String linkText = link.text().trim();
			// System.out.println(linkHref);
			// System.out.println(linkText);
			list.add(linkHref);
		}
		return list;
	}

	// test
	public static void main(String[] args)
	{
		HtmlParse htmlParse = new HtmlParse();
		HttpClientMethod httpClientMethod = new HttpClientMethod();
		String responseBody = httpClientMethod.httpGetMethod("http://localhost:8080/httpServer");
		List<String> strs = htmlParse.getHyperLinks(responseBody);
		for (String s : strs)
		{
			System.out.println(s);
		}
	}
}
